Skip to main content

Image Processing

This section documents the image processing capabilities available in the codebase.

Table of Contents

Image Captioning

File: models/image/image_caption.py

This module combines AWS Rekognition for identifying labels in images with OpenAI's GPT for generating natural language captions.

Initialization

from models.image.image_caption import ImageCaptioning

# Initialize with AWS and OpenAI credentials
image_captioner = ImageCaptioning(
aws_access_key_id='YOUR_AWS_ACCESS_KEY_ID',
aws_secret_access_key='YOUR_AWS_SECRET_ACCESS_KEY',
region_name='YOUR_AWS_REGION',
openai_api_key='YOUR_OPENAI_API_KEY'
)

Generate Caption

# Load image bytes
with open('path_to_image.jpg', 'rb') as image_file:
image_bytes = image_file.read()

# Generate caption
caption = image_captioner.caption_image(image_bytes)
print(caption)

How It Works

The captioning process follows these steps:

  1. The image is sent to AWS Rekognition to detect labels (objects, scenes, concepts)
  2. The detected labels are formatted into a comma-separated list
  3. This list is sent to OpenAI with a prompt to generate a descriptive caption
  4. The generated text is returned as the image caption

Color Scheme Extraction

File: models/image/image_color_theme.py

This module extracts dominant colors from images using k-means clustering.

Initialization

from models.image.image_color_theme import ImageColorScheme

# Initialize with the desired number of colors to extract
color_scheme = ImageColorScheme(num_colors=5)

Extract Colors from File

# Extract dominant colors and get hex codes
colors = color_scheme.get_hex_colors('path_to_image.jpg')
print(colors) # ['#ff5733', '#33ff57', '#5733ff', '#ff33a6', '#33a6ff']

Extract Colors from Image Bytes

# Extract colors from image bytes
with open('path_to_image.jpg', 'rb') as image_file:
image_bytes = image_file.read()

colors = color_scheme.get_hex_colors_from_bytes(image_bytes)
print(colors)

Raw RGB Colors

# Get RGB values instead of hex codes
raw_colors = color_scheme.extract_colors('path_to_image.jpg')
print(raw_colors) # [[255, 87, 51], [51, 255, 87], ...]

How It Works

The color extraction process works as follows:

  1. The image is loaded and resized for efficient processing
  2. The image colors are converted to RGB format
  3. K-means clustering is applied to identify the dominant color clusters
  4. The center of each cluster is extracted as a representative color
  5. Colors are converted to hex format for easy use in web and UI applications

AWS Rekognition

File: models/aws/rekognition.py

This module provides a client for Amazon Rekognition, AWS's computer vision service, offering a wide range of image and video analysis capabilities.

Initialization

from models.aws.rekognition import RekognitionClient

# Initialize with AWS credentials
rekognition = RekognitionClient(
aws_access_key_id='YOUR_AWS_ACCESS_KEY_ID',
aws_secret_access_key='YOUR_AWS_SECRET_ACCESS_KEY',
region_name='YOUR_AWS_REGION'
)

Detect Labels

# Load image bytes
with open('path_to_image.jpg', 'rb') as image_file:
image_bytes = image_file.read()

# Detect labels in the image
labels = rekognition.detect_labels(
image_bytes=image_bytes,
max_labels=10, # Maximum number of labels to return
min_confidence=80 # Minimum confidence percentage
)
print(labels)

Example response:

[
{
'Name': 'Car',
'Confidence': 99.15271759033203,
'Instances': [...],
'Parents': [{'Name': 'Vehicle'}, {'Name': 'Transportation'}]
},
{
'Name': 'Automobile',
'Confidence': 99.15271759033203,
'Instances': [],
'Parents': [{'Name': 'Vehicle'}, {'Name': 'Transportation'}]
},
# ...
]

Detect Faces

# Detect faces in the image
faces = rekognition.detect_faces(image_bytes)
print(faces)

Example response:

[
{
'BoundingBox': {
'Width': 0.6954022645950317,
'Height': 0.2544529736042023,
'Left': 0.1633375883102417,
'Top': 0.1475013792514801
},
'AgeRange': {'Low': 20, 'High': 30},
'Smile': {'Value': True, 'Confidence': 96.94185638427734},
'Eyeglasses': {'Value': False, 'Confidence': 99.69209289550781},
'Sunglasses': {'Value': False, 'Confidence': 99.9991226196289},
'Gender': {'Value': 'Female', 'Confidence': 99.99555969238281},
'Beard': {'Value': False, 'Confidence': 99.97463989257812},
'Mustache': {'Value': False, 'Confidence': 99.98968505859375},
'EyesOpen': {'Value': True, 'Confidence': 99.99991607666016},
'MouthOpen': {'Value': True, 'Confidence': 94.05339813232422},
'Emotions': [
{'Type': 'HAPPY', 'Confidence': 99.9469223022461},
{'Type': 'CALM', 'Confidence': 0.23001517355442047},
# ...
],
'Landmarks': [
{'Type': 'eyeLeft', 'X': 0.3295428156852722, 'Y': 0.2268327772617},
{'Type': 'eyeRight', 'X': 0.6795527935028076, 'Y': 0.2295929193496704},
# ...
],
'Pose': {
'Roll': -0.5577205419540405,
'Yaw': -0.9853221774101257,
'Pitch': 2.6768236160278
},
'Quality': {'Brightness': 43.768043518066406, 'Sharpness': 99.95819854736328},
'Confidence': 99.99998474121094
}
]

Detect Text

# Detect text in the image
text = rekognition.detect_text(image_bytes)
print(text)

Example response:

[
{
'DetectedText': 'HELLO',
'Type': 'LINE',
'Id': 0,
'Confidence': 99.35721588134766,
'Geometry': {...}
},
{
'DetectedText': 'WORLD',
'Type': 'LINE',
'Id': 1,
'Confidence': 99.6502914428711,
'Geometry': {...}
}
]

Compare Faces

# Load source and target images
with open('source_image.jpg', 'rb') as source_file:
source_image = source_file.read()

with open('target_image.jpg', 'rb') as target_file:
target_image = target_file.read()

# Compare faces between the two images
matches = rekognition.compare_faces(
source_image_bytes=source_image,
target_image_bytes=target_image,
similarity_threshold=90 # Only return matches with similarity >= 90%
)
print(matches)

Detect Moderation Labels

# Check for inappropriate content
moderation_labels = rekognition.detect_moderation_labels(
image_bytes=image_bytes,
min_confidence=80
)
print(moderation_labels)

Recognize Celebrities

# Identify celebrities in the image
celebrities = rekognition.recognize_celebrities(image_bytes)
print(celebrities)

Color Scheme Analysis

# Identify the main colors in the image
colors = rekognition.identify_color_scheme(image_bytes, num_colors=5)
print(colors)

Example response:

[
{'color': (240, 248, 255), 'count': 7890}, # RGB values and pixel count
{'color': (30, 144, 255), 'count': 4567},
{'color': (255, 255, 0), 'count': 2345},
{'color': (255, 0, 0), 'count': 1234},
{'color': (0, 0, 0), 'count': 567}
]

Detect Personal Protective Equipment

# Identify people and check if they're wearing PPE
ppe_detection = rekognition.detect_protective_equipment(image_bytes)
print(ppe_detection)

Advanced Use Cases

Multi-Step Processing

You can combine these services for more complex use cases:

# Analyze an image comprehensively
def analyze_image(image_path):
with open(image_path, 'rb') as image_file:
image_bytes = image_file.read()

# Initialize services
rekognition = RekognitionClient(...)
color_scheme = ImageColorScheme(num_colors=5)

# Get various analyses
labels = rekognition.detect_labels(image_bytes)
faces = rekognition.detect_faces(image_bytes)
text = rekognition.detect_text(image_bytes)
colors = color_scheme.get_hex_colors_from_bytes(image_bytes)

# Combine results
return {
"labels": labels,
"face_count": len(faces),
"emotions": [face.get('Emotions', []) for face in faces],
"text_content": [t['DetectedText'] for t in text],
"color_palette": colors
}

Image Content Moderation

def is_image_appropriate(image_bytes):
rekognition = RekognitionClient(...)
moderation_labels = rekognition.detect_moderation_labels(image_bytes)

# Check for inappropriate content
for label in moderation_labels:
if label['Confidence'] > 80 and label['Name'] in ['Explicit Nudity', 'Violence', 'Drugs']:
return False, label['Name']

return True, None

Back to Main Index